
** cross validation exercise splitting training and estimation samples within zop code 

***Inputs:
* $Data/GameUserWPdaily_norm.dta

***Outputs: 
* $Data/Trainingzips_time.dta
* $Data/timesplitdirectionsmonth_zip260.dta
* $Results/timesplitsample_highpm
			
use  $Data/GameUserWPdaily_norm.dta, clear 
	sort zip anon_id time_stamp 

	set seed 18112022
	

	by zip , sort: gen zipobs = _N
	
	** drop if less than 100 obs in zip
	
	drop if zipobs <100
	
	** collapse to zip year-month level within each zip pick the months randomly 
	
	gen monthyear =mofd(date)
	
	keep zip monthyear 
	
	duplicates drop 

	gen randorder = runiform()
	sort zip randorder 
	
	by zip (randorder), sort: gen ob =_n
	
	by zip (randorder), sort: gen zipmonths =_N

	
	
	** For each zip code, keep half months for training 	
	gen trainingsample =1 if ob < zipmonths/2  // split sample for cross validation 
	replace trainingsample =0 if trainingsample ==. 
	
	
	save $Data/Trainingzips_time.dta, replace
	
	
	

	use  $Data/GameUserWPdaily_norm.dta, clear 
	gen monthyear = mofd(date)
	
	merge m:1 zip monthyear using $Data/Trainingzips_time.dta
	
	keep if trainingsample ==1 
	
	gen highpm =  weighted_dailypm > 25 & weighted_dailypm<. 


	egen cut_winddirection = cut(median_closestwind), at(0(60)360)
	replace cut_winddirection = 0 if median_closestwind ==360	
	
	gen zip2 = int( zip/10)
	levelsof zip2
	local zip2 = r(levels)
	
	gen month = month(date)
	
	foreach zip in `zip2' {
		
		
	quietly	reghdfe highpm  i.cut_wind#i.month  if zip2 ==`zip' ,   absorb( anon_id  monthyear) cluster(anon_id monthyear )
				
		regsave using $Results/timesplitzipwind60month`zip', replace 
		
				
	}
	
	
	
	use $Results/timesplitzipwind60month99, clear
	
	gen zip2 = 99
	
		forval zip = 1(1)98 {
		
		capture append using  $Results/timesplitzipwind60month`zip'
		capture erase  $Results/timesplitzipwind60month`zip'
		
		replace zip2 = `zip' if zip2==.
		
		
	}
	

	
	
	
	gen direction = substr(var,1,3)
	destring direction, replace force ignore("c" "."  "u" "ons" "b")

	gen month = substr(var,-9,3)
	destring month, replace force ignore("c" "."  "u" "o" "b" "#" "n")

	
	replace coef = . if r2==. // 
	
	by zip2 month, sort: egen maxcoef= max(coef) if direction<.  & coef <. 


	gen maxdirection = direction if maxcoef ==coef & maxcoef >=0  & maxcoef<. 
	

	
	collapse (firstnm) maxdirection , by(zip2 month)
	
	drop if month ==. 
		
	save $Data/timesplitdirectionsmonth_zip260.dta, replace
	
	

	
	
	
	
	
	use $Data/GameUserWPdaily_norm.dta, clear
	
	gen monthyear = mofd(date)

	merge m:1 zip monthyear using $Data/Trainingzips_time.dta
	
	keep if trainingsample ==0
	
	drop _merge 
	
	gen month = month(date)


	egen cut_winddirection = cut(median_closestwind), at(0(60)360)
	replace cut_winddirection = 0 if median_closestwind ==360
		
	gen zip2 = int( zip/10)
	
	merge m:1 zip2 month using $Data/timesplitdirectionsmonth_zip260.dta, keepusing(maxdirection)
	
	
	

	gen highpm =  weighted_dailypm > 25 & weighted_dailypm<. 

	
	gen maxzipwind = (cut_winddirection ==maxdirection )
	replace maxzipwind = . if maxdirection==.
	
	keep if maxzipwind<. 
	

	
	egen cut_temp = cut(weighted_temperature), at(-40(10)40)
		
		
		

ivreghdfe norm_score (highpm =maxzipwind )  weighted_wind_speed weighted_precipitation , absorb(anon_id monthyear cut_temp) cluster(anon_id monthyear)

eststo splitsample 

estimates save $Results/timesplitsample_highpm , replace
	
		
		
